In [ ]:
from __future__ import division
import codecs
import pickle
import networkx as nx
from collections import Counter
rcParams['figure.figsize'] = (12.0, 10.0)
rcParams['font.family'] = 'Times New Roman'
In [ ]:
from os.path import abspath
workspace = "/".join(abspath('.').split('/')[:-1])
Note: Make sure that your workspace sees the root directory of openie_eval.
In [ ]:
from openie_eval.openie_eval import semantic_parsing as sp
from openie_eval.openie_eval import ontologization
reload(sp)
reload(ontologization)
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
In [ ]:
keyword = 'carnatic_music'
wiki_entities = codecs.open(workspace + '/data/ground-truth/'+keyword+'_pages.txt', encoding='utf-8').readlines()
wiki_entities = [i.strip().lower() for i in wiki_entities]
methods = ['reverb', 'openie', 'semantic-parsing']
labels = {'reverb': 'ReVerb', 'openie': 'OpenIE 4.0', 'semantic-parsing': 'Sem. Parsing'}
colors = ['#990033', '#006600', '#330066']
#coref_suffix = ''
coref_suffix = '-coref'
filtered_suffix = ''
#filtered_suffix = '-filtered'
rules = pickle.load(file(workspace + '/data/results/qualitative/entity-identification/rule-based/'+keyword+'/rules.pickle'))
groundtruth = ontologization.load_groundtruth(keyword, rules.keys())
In [ ]:
#class-wise
method_counts = {method: [] for method in methods}
for class_type in groundtruth.keys():
tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+class_type+'.tsv'
data = loadtxt(tsv_file, delimiter='\t', dtype='str')
if len(data) == 0:
for method in methods:
method_counts[method].append(0)
continue
method_counts['reverb'].append(sum(unique(data[:, 0]) != ''))
method_counts['openie'].append(sum(unique(data[:, 1]) != ''))
method_counts['semantic-parsing'].append(sum(unique(data[:, 2]) != ''))
In [ ]:
#all-together
tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+keyword+'.tsv'
data = loadtxt(tsv_file, delimiter='\t', dtype='str')
if keyword == 'carnatic_music':
n_subtract = 7
else:
n_subtract = 6
method_counts['reverb'].append(sum(unique(data[:, 0]) != '')-n_subtract)
method_counts['openie'].append(sum(unique(data[:, 1]) != '')-n_subtract)
method_counts['semantic-parsing'].append(sum(unique(data[:, 2]) != '')-n_subtract)
In [ ]:
method_counts
In [ ]:
rcParams['figure.figsize'] = (12.0, 10.0)
fig, ax = plt.subplots()
bar_width = 0.2
index = arange(len(groundtruth.keys())+1)
i = 0
for method in methods:
counts = method_counts[method]
rects = bar(index, counts, width=bar_width, color=colors[i], label=labels[method])
index = index+bar_width
i += 1
fontsize=30
xlabel('Concepts', fontsize=fontsize+2)
ylabel('#Relation types', fontsize=fontsize+2)
if keyword == 'carnatic_music':
xticks(index-1.5*bar_width, [i[9:] for i in groundtruth.keys()]+['all'])
else:
xticks(index-1.5*bar_width, [i[11:] for i in groundtruth.keys()]+['all'])
legend(prop={'size': fontsize}, loc='upper left',
fancybox=True)
xticks(fontsize=fontsize, rotation=18)
yticks(fontsize=fontsize)
In [ ]:
fname = workspace + '/data/results/qualitative/semantic-relation-extraction/'+keyword
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]:
close('all')
In [ ]:
#class-wise
method_rels = {method: {} for method in methods}
for class_type in groundtruth.keys():
tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+class_type+'.tsv'
data = loadtxt(tsv_file, delimiter='\t', dtype='str')
if len(data) == 0:
continue
method_rels['reverb'][class_type] = unique(data[:, 0])
method_rels['openie'][class_type] = unique(data[:, 1])
method_rels['semantic-parsing'][class_type] = unique(data[:, 2])
In [ ]:
method_rels
In [ ]:
method_counts = {method: [] for method in methods}
for method, reldata in method_rels.items():
relations = pickle.load(file(workspace+'/data/'+method+'/'+keyword+'/relations'+coref_suffix+'-filtered.pickle'))
relations = [[i['arg1'].lower(), lemmatizer.lemmatize(i['rel'].lower(), pos='v'), i['arg2'].lower()] for i in relations]
for class_type in groundtruth.keys():
class_count = 0
if class_type not in reldata.keys():
method_counts[method].append(0)
continue
for rel in reldata[class_type]:
class_count += len([i for i in relations if i[1] == rel])
method_counts[method].append(class_count)
In [ ]:
rcParams['figure.figsize'] = (12.0, 10.0)
fig, ax = plt.subplots()
bar_width = 0.2
index = arange(len(groundtruth.keys()))
i = 0
for method in methods:
counts = method_counts[method]
rects = bar(index, counts, width=bar_width, color=colors[i], label=labels[method])
index = index+bar_width
i += 1
fontsize=30
xlabel('Concepts', fontsize=fontsize+2)
ylabel('#Assertions', fontsize=fontsize+2)
if keyword == 'carnatic_music':
xticks(index-1.5*bar_width, [i[9:] for i in groundtruth.keys()])
else:
xticks(index-1.5*bar_width, [i[11:] for i in groundtruth.keys()])
legend(prop={'size': fontsize}, loc='upper center',
bbox_to_anchor=(0.5, 1.18), fancybox=True)
xticks(fontsize=fontsize, rotation=18)
yticks(fontsize=fontsize)
In [ ]:
ylim(0, 245)
In [ ]:
fname = workspace+'/data/results/qualitative/semantic-relation-extraction/'+keyword+'-relcount'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape',
papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
In [ ]:
close('all')